from pyspark import SparkConf,SparkContext
from pyspark.sql import SparkSession
from pyspark.sql import Row
spark = SparkSession.builder.appName("python spark SQL basic example").getOrCreate()

sc = spark.sparkContext

userrdd = sc.textFile("hdfs://master:9000/u01/bigdata/user_test.txt").map(lambda line: line.split(","))

df = userrdd.map(lambda fields: Row(userid = fields[0],age = int(fields[1]),gender = fields[2],occupation = fields[3],zipcode = fields[4]))

schemauser = spark.createDataFrame(df)
schemauser.createOrReplaceTempView("user")
schemauser.describe("userid","age","gender","occupation","zipcode").show()
